##
## Initialize the principal dataset
## (Nicolas Klotz)
##

rm(list=ls(all=TRUE))

library(plyr)
library(car)
library(reshape2)
library(gdata)
library(countrycode)
library(WDI) # import world bank WDI data
library(foreign)
library(lubridate)

setwd("/Users/nklotz/Documents/Studium/SS2014/Masterarbeit")

source(file = "Daten/Operationalisierung - Clean SVAC.R") # Import initial SVAC-Data and delete unneeded variables
source(file = "Daten/Operationalisierung - Abhängige Variable.R") # Dependent variable

# Independent variables
source(file = "Daten/Operationalisierung - Reciprocity.R") # takes a long time!
#save(Master, file="Daten/Master_reciprocity.RData")
source(file = "Daten/Operationalisierung - Actor Dummies.R") # Construct Actor Dummies
source(file = "Daten/Operationalisierung - Autoregressive.R")
source(file = "Daten/Operationalisierung - Conflict duration.R") # takes also some time
source(file = "Daten/Operationalisierung - Conflict type Enemy type.R")
source(file = "Daten/Operationalisierung - Ethnic fractionalization.R")
source(file = "Daten/Operationalisierung - Global Militarization Index.R")
source(file = "Daten/Operationalisierung - Lootable Resources.R")
source(file = "Daten/Operationalisierung - Mountainous terrain.R")
source(file = "Daten/Operationalisierung - Non-State Actor Characteristics.R")
source(file = "Daten/Operationalisierung - Population.R")
source(file = "Daten/Operationalisierung - Societal Inequalities.R")
source(file = "Daten/Operationalisierung - State Capacity.R")
source(file = "Daten/Operationalisierung - Troop Quality.R")
source(file = "Daten/Operationalisierung - War aim.R")
source(file = "Daten/Operationalisierung - External Support.R") # Join UCDP External Support data with SVAC data
source(file = "Daten/Operationalisierung - Other Violence.R") # Construct Variables indicating other forms of political violence
source(file = "Daten/Operationalisierung - Conflictintensinty.R") # Construct Conflictintensinty Variables (battle-related deaths)
source(file = "Daten/Operationalisierung - Gender Inequality.R") # Construct Gender Inequality Variables
source(file = "Daten/Operationalisierung - Democracy.R") # Construct Democracylevel Variables
source(file = "Daten/Operationalisierung - Territorial Control.R") # Construct Territorial Control Variables


# Inspect & delete possible duplicates

# View(Master[duplicated(Master[c("actorid", "year", "conflictid")]),])
# Master <- Master[!duplicated(Master[c("actorid", "year", "conflictid")]),]

# Save different data sets
save(Master, file="Daten/Master_all.RData") # all actors

states <- Master[Master$actor_type_state==1,]
states <- subset(states, select = c(-pgm_id)) # delete pgm_id (as it is NA for every state)

source(file = "Daten/Operationalization - Data Initialization - States.R")

save(states, file="Daten/Master_states.RData") # only state actors

# list rows of data that have missing values 
# View(states[!complete.cases(states),])
# nrow(states[!complete.cases(states),]) # 2770
# nrow(states) # 2779

# create new dataset without missing data 
states.na <- na.omit(states)
save(states.na, file="Daten/Master_states.na.RData")

# - statistics about the occurence of sv
# - " by actor_type

# subset: only state actors!

# make listwise delition, then statics
#   - how many states
#   - how many conflicts
